{
 "cells": [
  {
   "cell_type": "code",
   "id": "initial_id",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2"
   ],
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "import os\n",
    "import json\n",
    "import numpy as np\n",
    "import torch\n",
    "import xml.etree.ElementTree as ET\n",
    "import sqlite3\n",
    "import internal.utils.colmap as colmap"
   ],
   "id": "6b0a12d70589821d",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "basic_path = os.path.expanduser(\"~/data/image_set/dbl/\")\n",
    "image_dir_relative = \"AerialPhotography\"\n",
    "def image_path_to_name(image_path):\n",
    "    return image_path.split(\":\")[1][1:]"
   ],
   "id": "c004ebb717326aac",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": [
    "# Open XML\n",
    "Looks like the new version has renamed to iTwin"
   ],
   "id": "a84831b71727d41d"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "# `fixed_pp` means the principle points are not adjusted\n",
    "tree = ET.parse(os.path.expanduser(os.path.join(basic_path, \"Smart3DExportedPoses-ENU-x_right-y_down-fixed_pp.xml\")))\n",
    "tree"
   ],
   "id": "74304f7618b4892e",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "root = tree.getroot()\n",
    "root"
   ],
   "id": "564032f0304fd7db",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "block = root.find(\"Block\")\n",
    "block"
   ],
   "id": "293a8897accbd6f4",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "# Parse XML",
   "id": "562b6a7462cf150d"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "cameras = []\n",
    "image_paths = []\n",
    "image_names = []\n",
    "poses = []\n",
    "centers = []\n",
    "image_camera_ids = []\n",
    "for photogroup in block.findall(\"Photogroups/Photogroup\"):\n",
    "    imageDimensions = photogroup.find(\"ImageDimensions\")\n",
    "    width = int(imageDimensions.find(\"Width\").text)\n",
    "    height = int(imageDimensions.find(\"Height\").text)\n",
    "    focal_length = float(photogroup.find(\"FocalLength\").text)\n",
    "    sensor_size = float(photogroup.find(\"SensorSize\").text)\n",
    "\n",
    "    focal_length_in_pixel = focal_length / sensor_size * width\n",
    "\n",
    "    cameras.append({\n",
    "        \"width\": width,\n",
    "        \"height\": height,\n",
    "        \"focal_length\": focal_length_in_pixel,\n",
    "        \"principal_point\": (\n",
    "            float(photogroup.find(\"PrincipalPoint/x\").text), float(photogroup.find(\"PrincipalPoint/y\").text)),\n",
    "        \"distortion\": {i.tag: float(i.text) for i in photogroup.find(\"Distortion\")}\n",
    "    })\n",
    "    camera_idx = len(cameras) - 1\n",
    "\n",
    "    for photo in photogroup.findall(\"Photo\"):\n",
    "        rotation = list(photo.find(\"Pose/Rotation\"))\n",
    "        center = list(photo.find(\"Pose/Center\"))\n",
    "        if rotation[-1].text == \"false\":\n",
    "            continue\n",
    "        if center[-1].text == \"false\":\n",
    "            continue\n",
    "        image_paths.append(photo.find(\"ImagePath\").text)\n",
    "        image_names.append(image_path_to_name(photo.find(\"ImagePath\").text))\n",
    "        poses.append([float(i.text) for i in rotation[:-1]])\n",
    "        centers.append([float(i.text) for i in center[:-1]])\n",
    "        image_camera_ids.append(camera_idx)\n",
    "cameras"
   ],
   "id": "5f979783c6d586d9",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": "poses[0], centers[0], image_paths[0]",
   "id": "1cc2734f5d93bff0",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "# Convert",
   "id": "b44a790d76e48f0c"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "pose_reshaped = torch.tensor(poses, dtype=torch.float64).reshape((-1, 3, 3))\n",
    "pose_reshaped.shape, pose_reshaped[0]"
   ],
   "id": "5d1532e05a9ddcbc",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "c2w_rotations = torch.transpose(pose_reshaped, 1, 2)\n",
    "(c2w_rotations[0] == pose_reshaped[0].T).all(), c2w_rotations[0]"
   ],
   "id": "9507dd8d6ba4d103",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "c2w = torch.concat([\n",
    "    torch.concat([c2w_rotations, torch.tensor(centers, dtype=torch.float64)[..., None]], dim=-1),\n",
    "    torch.tensor([0., 0., 0., 1.], dtype=torch.float64)[None, None, :].repeat(c2w_rotations.shape[0], 1, 1),\n",
    "], dim=1)\n",
    "c2w.shape, c2w[1], centers[1]"
   ],
   "id": "f9a9364dfb58a71e",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "camera_centers = c2w[:, :3, 3]\n",
    "camera_centers[0], centers[0]"
   ],
   "id": "3d40762e577bc763",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "# Rescale and Translation",
   "id": "e45017cbe57fab76"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "mean_center = torch.mean(camera_centers, dim=0)\n",
    "mean_center"
   ],
   "id": "7f1250052eb39e59",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "camera_center_min = torch.min(camera_centers, dim=0).values\n",
    "camera_center_max = torch.max(camera_centers, dim=0).values\n",
    "camera_center_range = camera_center_max - camera_center_min\n",
    "camera_center_range"
   ],
   "id": "f016e80918148f3e",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "mid_center = (camera_center_min + camera_center_max) * 0.5\n",
    "mid_center"
   ],
   "id": "cf91481ce8fc09a2",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "max_range = 100.\n",
    "scale = camera_center_range.max() / max_range\n",
    "scale"
   ],
   "id": "ebd996911ceefa5b",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "c2w_rescaled_and_moved = torch.clone(c2w)\n",
    "c2w_rescaled_and_moved[:, :3, 3] -= mid_center\n",
    "c2w_rescaled_and_moved[:, :3, 3] /= scale\n",
    "c2w[0], c2w_rescaled_and_moved[0]"
   ],
   "id": "d13612ee8b8e088a",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "torch.save({\n",
    "    \"image_names\": image_names,\n",
    "    \"cameras\": cameras,\n",
    "    \"c2w\": c2w,\n",
    "    \"image_camera_ids\": image_camera_ids,\n",
    "    \"center\": mid_center,\n",
    "    \"scale\": scale,\n",
    "}, os.path.join(basic_path, \"parsed_from_xml.pt\"))"
   ],
   "id": "cc6bd96ac2c72e99",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "# Select a few for preview in NeRFStudio",
   "id": "6a0c49d0236e1626"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "distance2center = torch.norm(camera_centers - mean_center[None, :], dim=-1)\n",
    "distance2center"
   ],
   "id": "6d684eef4110036d",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "select_mask = distance2center < 128.\n",
    "select_mask.sum()"
   ],
   "id": "9e14c096c1b8f968",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "selected_image_ids = select_mask.nonzero().squeeze(-1)\n",
    "selected_image_ids"
   ],
   "id": "e32bfb069b31c6a0",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "camera_list = []\n",
    "for idx, pose in enumerate(c2w[select_mask]):\n",
    "    camera_list.append({\n",
    "        \"id\": idx,\n",
    "        \"img_name\": \"{:06d}\".format(idx),\n",
    "        \"width\": 1920,\n",
    "        \"height\": 1080,\n",
    "        \"position\": (pose[:3, 3] * 0.01).tolist(),\n",
    "        \"rotation\": pose[:3, :3].tolist(),\n",
    "        \"fx\": 1600,\n",
    "        \"fy\": 1600,\n",
    "        \"color\": [255, 0, 0],\n",
    "    })\n",
    "with open(os.path.join(os.path.expanduser(\"~/data/image_set/dbl\"), \"preview.json\"), \"w\") as f:\n",
    "    json.dump(camera_list, f)\n",
    "os.path.join(os.path.expanduser(\"~/data/image_set/dbl\"), \"preview.json\")"
   ],
   "id": "cbf3a66d3ff6e97f",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "transforms = {\n",
    "    \"aabb_scale\": 16,\n",
    "}\n",
    "\n",
    "frames = []\n",
    "for idx in selected_image_ids.tolist():\n",
    "    camera_id = image_camera_ids[idx]\n",
    "    file_path = os.path.join(image_dir_relative, image_names[idx])\n",
    "    camera = cameras[camera_id]\n",
    "    transform_matrix = torch.clone(c2w[idx])\n",
    "    transform_matrix[:, 1:3] *= -1\n",
    "    frames.append({\n",
    "        \"file_path\": file_path,\n",
    "        \"camera_model\": \"OPENCV\",\n",
    "        \"fl_x\": camera[\"focal_length\"],\n",
    "        \"fl_y\": camera[\"focal_length\"],\n",
    "        \"k1\": camera[\"distortion\"][\"K1\"],\n",
    "        \"k2\": camera[\"distortion\"][\"K2\"],\n",
    "        \"p1\": camera[\"distortion\"][\"P1\"],\n",
    "        \"p2\": camera[\"distortion\"][\"P2\"],\n",
    "        \"cx\": camera[\"width\"] // 2,\n",
    "        \"cy\": camera[\"height\"] // 2,\n",
    "        \"w\": camera[\"width\"],\n",
    "        \"h\": camera[\"height\"],\n",
    "        \"transform_matrix\": transform_matrix.tolist(),\n",
    "    })\n",
    "\n",
    "transforms[\"frames\"] = frames\n",
    "\n",
    "transforms_json_path = os.path.join(os.path.expanduser(\"~/data/image_set/dbl\"), \"transforms.json\")\n",
    "with open(transforms_json_path, \"w\") as f:\n",
    "    json.dump(transforms, f, indent=2)\n",
    "transforms_json_path"
   ],
   "id": "9b31640cbfb18e44",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "# Colmap",
   "id": "7c8e70329b4e0752"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "colmap_output_path = os.path.join(basic_path, \"colmap\")\n",
    "colmap_image_path = os.path.join(basic_path, image_dir_relative)"
   ],
   "id": "c712dcb6d836f38a",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "extract features",
   "id": "a7b1dc6aeb7a0e7"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "colmap_db_path = os.path.join(colmap_output_path, \"colmap.db\")\n",
    "assert os.path.exists(colmap_db_path) is False\n",
    "print(\" \\\\\\n    \".join([\n",
    "    \"colmap\",\n",
    "    \"feature_extractor\",\n",
    "    \"--database_path=\" + colmap_db_path,\n",
    "    \"--image_path=\" + colmap_image_path,\n",
    "    \"--ImageReader.camera_model=OPENCV\",\n",
    "]))"
   ],
   "id": "287d774cc4a57b9c",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "create a sparse model from known poses",
   "id": "566d523ec1eb0f67"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": "sparse_manually_model_dir = os.path.join(colmap_output_path, \"sparse_manually\")",
   "id": "1be723a16543039c",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "assert os.path.exists(sparse_manually_model_dir) is False\n",
    "assert os.path.exists(colmap_db_path + \"-shm\") is False, \"{} is opened by another process\".format(colmap_db_path)\n",
    "\n",
    "colmap_db = sqlite3.connect(colmap_db_path)\n",
    "\n",
    "def array_to_blob(array):\n",
    "    return array.tostring()\n",
    "\n",
    "\n",
    "def select_image(image_name: str):\n",
    "    cur = colmap_db.cursor()\n",
    "    try:\n",
    "        return cur.execute(\"SELECT image_id, camera_id FROM images WHERE name = ?\", [image_name]).fetchone()\n",
    "    finally:\n",
    "        cur.close()\n",
    "\n",
    "\n",
    "def set_image_camera_id(image_id: int, camera_id: int):\n",
    "    cur = colmap_db.cursor()\n",
    "    try:\n",
    "        cur.execute(\"UPDATE images SET camera_id = ? WHERE image_id = ?\", [camera_id, image_id])\n",
    "        colmap_db.commit()\n",
    "    finally:\n",
    "        cur.close()\n",
    "\n",
    "\n",
    "def update_camera_params(camera_id: int, params: np.ndarray):\n",
    "    cur = colmap_db.cursor()\n",
    "    try:\n",
    "        cur.execute(\"UPDATE cameras SET params = ? WHERE camera_id = ?\", [\n",
    "            array_to_blob(params),\n",
    "            camera_id,\n",
    "        ])\n",
    "        colmap_db.commit()\n",
    "    finally:\n",
    "        cur.close()\n",
    "\n",
    "\n",
    "def delete_unused_cameras():\n",
    "    cur = colmap_db.cursor()\n",
    "    try:\n",
    "        cur.execute(\"DELETE FROM cameras WHERE camera_id NOT IN (SELECT camera_id FROM images)\")\n",
    "        colmap_db.commit()\n",
    "    finally:\n",
    "        cur.close()"
   ],
   "id": "edf9c49745cdaa35",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "w2cs = torch.linalg.inv(c2w_rescaled_and_moved)\n",
    "w2cs[0], c2w_rescaled_and_moved[0]"
   ],
   "id": "5e7e718f131fcd46",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "colmap_cameras = {}\n",
    "colmap_images = {}\n",
    "context_camera_idx_to_colmap_camera_idx = {}\n",
    "\n",
    "for idx in range(c2w.shape[0]):\n",
    "    image_name = image_names[idx]\n",
    "    colmap_image_idx, colmap_camera_idx = select_image(image_name)\n",
    "    # share intrinsics\n",
    "    context_camera_idx = image_camera_ids[idx]\n",
    "    colmap_camera_idx = context_camera_idx_to_colmap_camera_idx.setdefault(context_camera_idx, colmap_camera_idx)\n",
    "    set_image_camera_id(colmap_image_idx, colmap_camera_idx)\n",
    "\n",
    "    w2c = w2cs[idx]\n",
    "\n",
    "    colmap_images[colmap_image_idx] = colmap.Image(\n",
    "        id=colmap_image_idx,\n",
    "        qvec=colmap.rotmat2qvec(w2c[:3, :3].numpy()),\n",
    "        tvec=w2c[:3, 3].numpy(),\n",
    "        camera_id=colmap_camera_idx,\n",
    "        name=image_name,\n",
    "        xys=np.array([], dtype=np.float64),\n",
    "        point3D_ids=np.asarray([], dtype=np.int64),\n",
    "    )\n",
    "\n",
    "    if colmap_camera_idx not in colmap_cameras:\n",
    "        camera = cameras[context_camera_idx]\n",
    "        # [fx, fy, cx, cy, k1, k2, p1, p2]\n",
    "        camera_params = torch.tensor([\n",
    "            camera[\"focal_length\"],\n",
    "            camera[\"focal_length\"],\n",
    "            camera[\"width\"] // 2,\n",
    "            camera[\"height\"] // 2,\n",
    "            camera[\"distortion\"][\"K1\"],\n",
    "            camera[\"distortion\"][\"K2\"],\n",
    "            camera[\"distortion\"][\"P1\"],\n",
    "            camera[\"distortion\"][\"P2\"],\n",
    "        ], dtype=torch.float64)\n",
    "        update_camera_params(colmap_camera_idx, camera_params.numpy())\n",
    "        colmap_cameras[colmap_camera_idx] = colmap.Camera(\n",
    "            id=colmap_camera_idx,\n",
    "            model=\"OPENCV\",\n",
    "            width=camera[\"width\"],\n",
    "            height=camera[\"height\"],\n",
    "            params=camera_params.numpy(),\n",
    "        )\n",
    "\n",
    "delete_unused_cameras()\n",
    "colmap_db.close()"
   ],
   "id": "4ed0f4f96318bf0e",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "os.makedirs(sparse_manually_model_dir)\n",
    "colmap.write_images_binary(colmap_images, os.path.join(sparse_manually_model_dir, \"images.bin\"))\n",
    "colmap.write_cameras_binary(colmap_cameras, os.path.join(sparse_manually_model_dir, \"cameras.bin\"))\n",
    "colmap.write_points3D_binary({}, os.path.join(sparse_manually_model_dir, \"points3D.bin\"))"
   ],
   "id": "c6c50335a5a8ae18",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": "colmap.read_cameras_binary(os.path.join(sparse_manually_model_dir, \"cameras.bin\"))",
   "id": "c7a858c6d56da2a5",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "feature matcher",
   "id": "9876bf6f25d9bc2d"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "print(\" \\\\\\n    \".join([\n",
    "    \"colmap\",\n",
    "    \"vocab_tree_matcher\",\n",
    "    \"--database_path=\" + colmap_db_path,\n",
    "    \"--VocabTreeMatching.vocab_tree_path=\" + os.path.expanduser(\"~/.cache/colmap/vocab_tree_flickr100K_words256K.bin\"),\n",
    "]))"
   ],
   "id": "d7b519bac1e17317",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "point triangulator",
   "id": "a6aeac933ef8563b"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "sparse_dir_triangulated = os.path.join(colmap_output_path, \"sparse\")\n",
    "os.makedirs(sparse_dir_triangulated, exist_ok=True)\n",
    "print(\" \\\\\\n    \".join([\n",
    "        \"colmap\",\n",
    "        \"point_triangulator\",\n",
    "        \"--database_path=\" + colmap_db_path,\n",
    "        \"--image_path=\" + colmap_image_path,\n",
    "        \"--input_path=\" + sparse_manually_model_dir,\n",
    "        \"--output_path=\" + sparse_dir_triangulated,\n",
    "]))"
   ],
   "id": "4f71c83132006143",
   "outputs": [],
   "execution_count": null
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
